energy <- read_csv(here("data", "energy.csv"))
# Wrangle data to get proper date column
energy_ts <- energy %>%
mutate(date = yearmonth(month)) %>%
as_tsibble(key = NULL, index = date)
ggplot(data = energy_ts, aes(x = date, y = res_total)) +
geom_line() +
labs(y = "Residential energy consumption \n (Trillion BTU)")
energy_ts %>%
gg_season(y = res_total) +
theme_minimal() +
labs(x = "month", y = "residential energy consumption (trillion BTU)")
energy_ts %>%
gg_subseries(res_total)
# Find STL decomposition
dcmp <- energy_ts %>%
model(STL(res_total ~ season()))
# Visualize the decomposed components
components(dcmp) %>%
autoplot() +
theme_minimal()
energy_ts %>%
ACF(res_total) %>%
autoplot()
# Create the model:
energy_fit <- energy_ts %>%
model(
ets = ETS(res_total ~ season("M"))
)
# Forecast using the model 10 years into the future:
energy_forecast <- energy_fit %>%
forecast(h = "10 years")
# Plot just the forecasted values (with 80 & 95% CIs):
energy_forecast %>%
autoplot()
# Or plot it added to the original data:
energy_forecast %>%
autoplot(energy_ts)
# Append the predicted values (and residuals) to original energy data
energy_predicted <- augment(energy_fit)
# Now plot the actual energy values (res_total), and the predicted values (stored as .fitted) atop them:
ggplot(data = energy_predicted) +
geom_line(aes(x = date, y = res_total)) +
geom_line(aes(x = date, y = .fitted, color = "pink"))
Explore the residuals next. Some important considerations: Residuals should be uncorrelated, centered at 0, and ideally normally distributed. One way we can check the distribution is with a histogram:
ggplot(data = energy_predicted, aes(x = .resid)) +
geom_histogram()
Relatively normally distributed, and centered at 0 (we could find summary statistics beyond this to further explore)
ETS forecasting, seasonal naive (SNAIVE) and autoregressive integrated moving average (ARIMA)
# Fit 3 different forecasting models (ETS, ARIMA, SNAIVE):
energy_fit_multi <- energy_ts %>%
model(
ets = ETS(res_total ~ season("M")),
arima = ARIMA(res_total),
snaive = SNAIVE(res_total)
)
# Forecast 3 years into the future (from data end date)
multi_forecast <- energy_fit_multi %>%
forecast(h = "3 years")
# Plot the 3 forecasts
multi_forecast %>%
autoplot(energy_ts)
# Or just view the forecasts (not the similarity across models):
multi_forecast %>%
autoplot()